#!usr/bin/env python  
# -*- coding:utf-8 _*-
""" 
@author:pengfeifu
@file: xbiquge.py 
@version:
@time: 2021/11/06 
@email:1324734112@qq.com
@desc： 斗罗大陆网络爬虫
@function：常用函数
"""
import time

import requests
import random
import os
import bs4
from bs4 import BeautifulSoup
import sys
import importlib

importlib.reload(sys)

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
}

server_url = "https://www.xbiquge.la"

book_server_url = "https://www.xbiquge.la/1/1710/"

file_save_url = "D:\\net\\book\\斗罗大陆"


def directory_mkdir(file_path):
    if os.path.exists(file_path) is False:
        os.makedirs(file_path)
    os.chdir(file_path)


def get_content(book_src):
    print("正在请求路径：", book_src)
    book_res = requests.get(book_src, headers=headers)
    book_soup = BeautifulSoup(str(book_res.content, "utf8"), "html.parser")
    text_content = book_soup.find('div', class_="content_read").find("div", id="content")
    print(text_content.contents)
    # 获取div标签id属性content的内容 \xa0 是不间断空白符 &nbsp;
    text = text_content.text.replace('\xa0' * 4, '\n')
    return text


def file_write(contents, file_name):
    try:
        with open(file_name, "a", encoding="utf-8") as f:
            f.write(contents)
    except IOError:
        print("文件写入失败")
    finally:
        f.close()


if __name__ == "__main__":
    rep = requests.get(book_server_url, headers=headers)
    soup = BeautifulSoup(str(rep.content, "utf8"), "html.parser")
    a_list = soup.find("div", id="list").find_all("a")
    print("斗罗大陆章节共计：%d" % len(a_list))
    counter = 0
    for a in a_list:
        counter += 1
        time.sleep(random.randint(1, 2))
        # a.attrs["href"]=a.get("href")
        book_src = server_url + a.attrs["href"]
        # 读取网络文本内容
        content = get_content(book_src)
        # 写入保存
        file_write(content, file_save_url + "\\" + str(counter) + a.string + ".txt")
